{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>case_num</th>\n",
       "      <th>diagnosis</th>\n",
       "      <th>seven_point_score</th>\n",
       "      <th>pigment_network</th>\n",
       "      <th>streaks</th>\n",
       "      <th>pigmentation</th>\n",
       "      <th>regression_structures</th>\n",
       "      <th>dots_and_globules</th>\n",
       "      <th>blue_whitish_veil</th>\n",
       "      <th>vascular_structures</th>\n",
       "      <th>...</th>\n",
       "      <th>case_id</th>\n",
       "      <th>notes</th>\n",
       "      <th>diagnosis_numeric</th>\n",
       "      <th>pigment_network_numeric</th>\n",
       "      <th>blue_whitish_veil_numeric</th>\n",
       "      <th>vascular_structures_numeric</th>\n",
       "      <th>pigmentation_numeric</th>\n",
       "      <th>streaks_numeric</th>\n",
       "      <th>dots_and_globules_numeric</th>\n",
       "      <th>regression_structures_numeric</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>basal cell carcinoma</td>\n",
       "      <td>0</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>arborizing</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>basal cell carcinoma</td>\n",
       "      <td>1</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>irregular</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>basal cell carcinoma</td>\n",
       "      <td>1</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>irregular</td>\n",
       "      <td>absent</td>\n",
       "      <td>arborizing</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>basal cell carcinoma</td>\n",
       "      <td>4</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>blue areas</td>\n",
       "      <td>irregular</td>\n",
       "      <td>present</td>\n",
       "      <td>within regression</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>basal cell carcinoma</td>\n",
       "      <td>1</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>diffuse irregular</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>absent</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   case_num             diagnosis  seven_point_score pigment_network streaks  \\\n",
       "0         1  basal cell carcinoma                  0          absent  absent   \n",
       "1         2  basal cell carcinoma                  1          absent  absent   \n",
       "2         3  basal cell carcinoma                  1          absent  absent   \n",
       "3         4  basal cell carcinoma                  4          absent  absent   \n",
       "4         5  basal cell carcinoma                  1          absent  absent   \n",
       "\n",
       "        pigmentation regression_structures dots_and_globules  \\\n",
       "0             absent                absent            absent   \n",
       "1             absent                absent         irregular   \n",
       "2             absent                absent         irregular   \n",
       "3             absent            blue areas         irregular   \n",
       "4  diffuse irregular                absent            absent   \n",
       "\n",
       "  blue_whitish_veil vascular_structures              ...                \\\n",
       "0            absent          arborizing              ...                 \n",
       "1            absent              absent              ...                 \n",
       "2            absent          arborizing              ...                 \n",
       "3           present   within regression              ...                 \n",
       "4            absent              absent              ...                 \n",
       "\n",
       "  case_id notes diagnosis_numeric pigment_network_numeric  \\\n",
       "0     NaN   NaN                 0                       0   \n",
       "1     NaN   NaN                 0                       0   \n",
       "2     NaN   NaN                 0                       0   \n",
       "3     NaN   NaN                 0                       0   \n",
       "4     NaN   NaN                 0                       0   \n",
       "\n",
       "  blue_whitish_veil_numeric vascular_structures_numeric pigmentation_numeric  \\\n",
       "0                         0                           1                    0   \n",
       "1                         0                           0                    0   \n",
       "2                         0                           1                    0   \n",
       "3                         1                           1                    0   \n",
       "4                         0                           0                    2   \n",
       "\n",
       "  streaks_numeric dots_and_globules_numeric  regression_structures_numeric  \n",
       "0               0                         0                              0  \n",
       "1               0                         2                              0  \n",
       "2               0                         2                              0  \n",
       "3               0                         2                              1  \n",
       "4               0                         0                              0  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sys, os\n",
    "import pandas as pd\n",
    "sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..'))) # To import derm7pt\n",
    "from derm7pt.dataset import Derm7PtDatasetGroupInfrequent\n",
    "\n",
    "# Change this line to your data directory.\n",
    "dir_release = '/local-scratch/jer/data/argenziano/release_v0'\n",
    "\n",
    "# Dataset after grouping infrequent labels.\n",
    "derm_data = Derm7PtDatasetGroupInfrequent(\n",
    "    dir_images=os.path.join(dir_release, 'images'), \n",
    "    metadata_df=pd.read_csv(os.path.join(dir_release, 'meta/meta.csv')), \n",
    "    train_indexes=list(pd.read_csv(os.path.join(dir_release, 'meta/train_indexes.csv'))['indexes']), \n",
    "    valid_indexes=list(pd.read_csv(os.path.join(dir_release, 'meta/valid_indexes.csv'))['indexes']), \n",
    "    test_indexes=list(pd.read_csv(os.path.join(dir_release, 'meta/test_indexes.csv'))['indexes']))\n",
    "\n",
    "# Outputs to screen the first 5 rows of the preprocessed dataset in a Pandas format.\n",
    "derm_data.df.head(n=5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
